<?php
	/*
		该页面为爬虫抓取数据页面
			基本完成了对穴位数据的抓取，并保存到数据库
	*/


	header('content-type:text/html;charset=UTF-8');
	require "MyPdo.class.php";
	$pdo = new MyPdo();

	ini_set('max_execution_time', '0');
	$url = 'https://xuewei.supfree.net';

	$res = Curls($url);


	$p="/\<li\>\<a class=\"bblue\" href=\"durex.asp\?id\=.*\<\/a\>\<\/li\>/";
	


	preg_match_all($p,$res,$res2);
	

	$data = array();
	foreach($res2[0] as $v){
		$p2="/\"durex\.asp\?id\=([0-9]+)\"\>([^<]*)\<.*/";
		preg_match_all($p2,$v,$res3);
		$code = $res3[1][0];											//穴位代号
		$name = $res3[2][0];											//穴位名称
		$surl = 'https://xuewei.supfree.net/durex.asp?id='.$code;		//穴位说明地址
		$tmp = array($code,$name,$surl);
		array_push($data,$tmp);
	}

	//dump($data,1);

	$index = 321;
	$count = 0;
	GetInfo($data,$index,$pdo,$count);

	function GetInfo(&$data,&$index,$pdo,&$count){
		echo $index.'<br>';
		$sql_c = "select id from translate_info where lefts = ?";
		$res_c = $pdo->dql($sql_c,array($data[$index][1]),1);
		if($index >= count($data)){
			return;
		}
		if($count > 40){
			return;
		}
		if(@$res_c){
			echo $data[$index][1].'exists !<br>';
		}else{
			$url = $data[$index][2];
			$zres = Curls($url);
			$zp = "/([\x80-\xff]*)/i";
			preg_match_all($zp, $zres, $zres);

			$str = '';
			foreach($zres[0] as $v2){
				$str .= $v2;
			}

			//改一
			$zp2 = "/取穴方法.*生理解剖|取穴方法.*解剖位置|取穴方法.*〖解剖|取穴方法.*〖注意|取穴方法.*〖穴位解剖|取穴方法.*〖生理穴位|取穴方法.*取穴图/";
			preg_match_all($zp2, $str, $str2);

			//改二
			$zp3 = "/〖生理解剖|〖解剖位置|〖穴位解剖|〖注意|〖解剖|〖生理穴位/";
			$zp3r = "";
			$str3 = preg_replace($zp3,$zp3r,$str2[0][0]);		//信息
			$lefts = $data[$index][1];
			$rights = $data[$index][1]."\r\n\r\n".$data[$index][2]."\r\n\r\n".$str3;
			echo $rights.'</br></br>';
			$sql = "insert into translate_info values(null,?,?,20)";
			$res = $pdo->dml($sql,array($lefts,$rights));
			echo $index." is ok !<br><br><br><br>";
		}
		
		$index ++;
		$count ++;
		GetInfo($data,$index,$pdo,$count);
	}
	













/*
	### CURL 抓取指定地址内容(get方式)
	参一：$url    string   要抓取的地址
	参二：$code1  string   被转换的编码类型 ，默认GBK
	参三：$code2  string   要转换成的编码类型 , 默认UTF-8
	return：返回抓取到的内容(string形式)
	示例：
		$url = 'https://xuewei.supfree.net';
		echo Curls($url,'gbk');exit;
*/
function Curls($url,$code1='GBK',$code2='UTF-8'){
	$curlobj = curl_init();
	curl_setopt($curlobj,CURLOPT_URL,$url);
	curl_setopt($curlobj,CURLOPT_RETURNTRANSFER,true);
	curl_setopt($curlobj, CURLOPT_SSL_VERIFYPEER, FALSE);
	curl_setopt($curlobj, CURLOPT_SSL_VERIFYHOST, FALSE);
	$res = curl_exec($curlobj);
	curl_close($curlobj);
	if($code1 != $code2){
		return iconv($code1,$code2, $res);
	}else{
		return $res;		
	}
}

function dump($s,$t = false){
	echo "<pre>";
		var_dump($s);
	echo "</pre>";
	if($t){
		exit;
	}
}